*>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
* SET GLOBAL $PATHS
*>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
global root = "INSERT CUSTOMIZED PATH"
global data = "$root/data/"
global output = "$root/data/processed/HealthProcess/"
global figures	= "$root/figsandtabs"
global tables	= "$root/figsandtabs"
*>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>

set scheme s1mono
set seed 12345

scalar agewide=5

import delimited using "${data}\raw\GKV\ACG2010", clear stringcols(1)

keep patient_id age sex unscaled_total_cost_predicted_ri

gen pseud=patient_id

gen year=2011

destring unscaled_total_cost_predicted_ri , gen(lambdastar0) dpcomma

gen shi=1

append using "${data}\raw\PKV\MainPKVSample.dta", keep(alter female alter year ID_TAR ID_VPNR deductible expenditure lambdastar0)

replace shi=0 if shi==.

egen agegr = cut(alter), at(25(`=agewide')100) label

scalar cat75=floor(50/`=agewide')
replace agegr=`=cat75' if agegr>`=cat75'&agegr<.

gen civserv=(ID_TAR>=104)*(ID_TAR~=150)  /* ID_TAR 150 is for civil servants who don't get beihilfe */

bysort ID_VPNR: egen firsty=min(year)

gen anasamp=(agegr~=.)*(firsty<2009)*(deductible<500)
replace anasamp=0 if anasamp==1&civserv==1&deductible>250 /*  Deductible of civil servants apply only to half of costs */

format lambdastar0 %15.0fc

*histogram lambdastar0 if lambdastar0<10&year==2011&anasamp==1, start(0) width(0.1)  fcolor(none) lcolor(black)

*histogram lambdastar0 if age>=24&shi==1, start(0) width(0.1)  fcolor(none) lcolor(black)

twoway (histogram lambdastar0 if lambdastar0<10&year==2011&anasamp==1, start(0) width(0.1) color(gs10)) (histogram lambdastar0 if lambdastar0<10&age>=24&shi==1, start(0) width(0.1)  fcolor(none) lcolor(black)),   xtitle("ACG Score ({&lambda}{superscript:{&lowast}}{sub:t})", $size height(7)) ytitle(, $size) xlabel(, format(%9.0g) $ls) ylabel(, $ls) legend(ring(0) pos(1) size($legs) rows(2) order(1 "PKV Sample" 2 "GKV Sample"))

graph export "${figures}\acgcompraw.pdf",  replace

/* Next: reweight by age/sex */

replace alter=age if alter==.
replace female=1 if sex==2
replace female=0 if sex==1

drop if anasamp==0 & shi==0

bysort alter female: egen tshi=total(shi)
bysort alter female: gen totp=_N
gen tphi=totp-tshi

gen casew=round(40*tphi/tshi)

histogram casew

twoway (histogram lambdastar0 if lambdastar0<10&year==2011&anasamp==1, start(0) width(0.1) color(gs10)) (histogram lambdastar0 if lambdastar0<10&age>=24&shi==1 [fw=casew], start(0) width(0.1)  fcolor(none) lcolor(black)),   xtitle("ACG Score ({&lambda}{superscript:{&lowast}}{sub:t})", $size height(7)) ytitle(, $size) xlabel(, format(%9.0g) $ls) ylabel(, $ls) legend(ring(0) pos(1) size($legs) rows(2) order(1 "PKV Sample" 2 "GKV Sample"))

graph export "${figures}\acgcompweight.pdf",  replace


/* Descriptives: all */

matrix sumstatw=J(1,4,1)


foreach var of varlist alter female lambdastar0 {

qui su `var' if age>=24&shi==1, detail


matrix varcheck=r(mean)
matrix nob=r(N)

qui su `var' if year==2011&anasamp==1, detail

matrix varcheck=varcheck,r(mean)
matrix nob=nob,r(N)

qui su `var' if age>=24&shi==1&inrange(alter,25,35), detail


matrix varcheck=varcheck,r(mean)
matrix nob=nob,r(N)

qui su `var' if year==2011&anasamp==1&inrange(alter,25,35), detail

matrix varcheck=varcheck,r(mean)
matrix nob=nob,r(N)

matrix sumstatw=sumstatw\varcheck
}

matrix sumstatw=sumstatw\nob


matrix sumstatw=sumstatw[2...,1...]

matrix rownames sumstatw= "Age (in years)" "Female" "ACG Score" "N"

estout matrix(sumstatw, fmt("%9.1fc 3 3 %9.0fc" "%9.1fc 3 3 %9.0fc" "%9.1fc 3 3 %9.0fc" "%9.1fc 3 3 %9.0fc")) using "${tables}\sumstatswgkv.txt", collabels(,none) mlabels(,none) style(tex) replace


/* Generate clustering */

import delimited using "${data}\raw\GKV\ACG2010", clear stringcols(1)

keep patient_id age sex unscaled_total_cost_predicted_ri

destring unscaled_total_cost_predicted_ri , gen(lambdastar0) dpcomma

gen shi=1

gen year=2011

save "${data}\temp\ACG2010proc", replace

import delimited using "${data}\raw\GKV\ACG2011", clear stringcols(1)

keep patient_id age sex unscaled_total_cost_predicted_ri

destring unscaled_total_cost_predicted_ri , gen(lambdastar0) dpcomma

gen shi=1

gen year=2012

append using "${data}\temp\ACG2010proc"

egen patid=group(patient_id)
duplicates report patid

rename age alter

drop patient_id

append using "${data}\raw\PKV\MainPKVSample.dta", keep(alter female alter year ID_TAR ID_VPNR deductible expenditure lambdastar0)

replace shi=0 if shi==.

su ID_VPNR
replace ID_VPNR=r(max)+patid if ID_VPNR==.

egen agegr = cut(alter), at(25(`=agewide')110) label

scalar cat75=floor(50/`=agewide')
replace agegr=`=cat75' if agegr>`=cat75'&agegr<.

gen civserv=(ID_TAR>=104)*(ID_TAR~=150)  /* ID_TAR 150 is for civil servants who don't get beihilfe */

bysort ID_VPNR: egen firsty=min(year)

gen anasamp=(agegr~=.)*(firsty<2009)*(deductible<500)
replace anasamp=0 if anasamp==1&civserv==1&deductible>250 /*  Deductible of civil servants apply only to half of costs */

replace female=1 if sex==2
replace female=0 if sex==1


format lambdastar0 %15.0fc

xtset ID_VPNR year

gen l1lam=L.lambdastar0
gen l2lam=L2.lambdastar0

replace l1lam=lambdastar0 if shi==1
replace l2lam=L.lambdastar0 if shi==1

mvrs, all degree(3): regress expenditure l1lam l2lam if anasamp==1&shi==0

foreach var of varlist l1lam_0 l1lam_1 l1lam_2 l1lam_3 {
foreach varb of varlist l2lam_0 l2lam_1 l2lam_2 l2lam_3 {
gen `var'`varb'=`var'*`varb'
}
}

reg expenditure l1lam_0- l1lam_3l2lam_3 if anasamp==1&shi==0
predict pexp

cluster kmeans pexp if anasamp==1&shi==0, k(7) generate(cere7)

bysort cere7: egen cere7b=mean(pexp)
egen cere7bb=group(cere7b) if cere7~=.
replace cere7=cere7bb
drop cere7bb cere7b


gen cere7b=cere7

foreach lam of numlist 1/7 {
su pexp if cere7==`lam'&anasamp==1&shi==0
replace cere7b=`lam' if (anasamp==0|shi==1)&pexp>=r(min)&pexp<=r(max)
tab cere7b anasamp
}


replace shi=. if shi==0&anasamp==0

tab agegr cere7b if  shi==1, row nofreq matcell(lamfreq)

matrix list lamfreq

mata : freqs = st_matrix("lamfreq")
mata : freqs :/ rowsum(freqs)

mata: st_matrix("lamfreq2", freqs :/ rowsum(freqs))

matrix rownames lamfreq2 = $rown2

matrix rownames lamfreq2 = "25-30" "30-35" "35-40" "40-45" "45-50" "50-55" "55-60" "60-65" "65-70" "70-75" "75+"

estout matrix(lamfreq2, fmt(3)) using "${tables}\lamfreqgkv.txt", style(tex) replace ///
mlabels(,none) collabels(,none) eqlabels(,none) 


xi, noomit i.cere7b

collapse _Icere7b_*  (count) clients=cere7b, by(agegr shi)

drop if shi==.

drop if agegr==.

save "${output}\acgdist", replace

export delimited _I* if agegr==0 & shi==1 using "${output}\acgdist25shi.csv", replace
